library("FRESA.CAD")
library(readxl)
library(igraph)
library(umap)
library(tsne)
library(entropy)
op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
pander::panderOptions('table.split.table', 400)
pander::panderOptions('keep.trailing.zeros',TRUE)
Data Source https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6960825/
Mohino-Herranz I, Gil-Pita R, Rosa-Zurera M, Seoane F. Activity Recognition Using Wearable Physiological Measurements: Selection of Features from a Comprehensive Literature Study. Sensors (Basel). 2019 Dec 13;19(24):0. doi: 10.3390/s19245524. PMID: 31847261; PMCID: PMC6960825.
Activitydata <- read.csv("~/GitHub/LatentBiomarkers/Data/ActivityData/data.txt", header=FALSE, stringsAsFactors=TRUE)
featNames <- read.table("~/GitHub/LatentBiomarkers/Data/ActivityData/Featurelabels.txt", quote="\"", comment.char="")
featNames <- as.character(t(featNames))
featNames <- str_replace_all(featNames,"\\(abs\\)","_A_")
featNames[duplicated(featNames)] <- paste(featNames[duplicated(featNames)],"D",sep="_")
rep_ID <- numeric(nrow(Activitydata))
ctr <- 1
for (ind in c(1:(nrow(Activitydata)-1)))
{
rep_ID[ind] <- ctr
if (Activitydata$V1[ind] != Activitydata$V1[ind+1]) ctr <- 0;
ctr <- ctr + 1
}
rownames(Activitydata) <- paste(Activitydata$V1,rep_ID,sep="_")
colnames(Activitydata) <- c("ID",featNames,"class")
Activitydata$rep <- rep_ID
tb <- table(Activitydata$class)
classes <- c("Neu","Emo","Men","Phy")
names(classes) <- names(tb)
Activitydata$class <- classes[as.character(Activitydata$class)]
table(Activitydata$class)
#>
#> Emo Men Neu Phy
#> 1120 1120 1120 1120
ID_class <- paste(Activitydata$ID,Activitydata$class)
IDCLASS <- unique(ID_class)
theclass <- Activitydata$class[!duplicated(ID_class)]
theIDs <- Activitydata$ID[!duplicated(ID_class)]
ActivitydataAvg <- NULL
for (id in IDCLASS)
{
ActivitydataAvg <- rbind(ActivitydataAvg,apply(Activitydata[ID_class==id,featNames],2,mean))
}
colnames(ActivitydataAvg) <- featNames
rownames(ActivitydataAvg) <- IDCLASS
ActivitydataAvg <- as.data.frame(ActivitydataAvg)
ActivitydataAvg$class <- theclass
ActivitydataAvg$ID <- theIDs
table(ActivitydataAvg$class)
#>
#> Emo Men Neu Phy
#> 40 40 40 40
ActivitydataAvg <- subset(ActivitydataAvg, class=="Men" | class=="Emo")
ActivitydataAvg$class <- 1*(ActivitydataAvg$class == "Men")
table(ActivitydataAvg$class)
#>
#> 0 1
#> 40 40
studyName <- "Activity"
dataframe <- ActivitydataAvg
outcome <- "class"
TopVariables <- 10
thro <- 0.80
cexheat = 0.15
Some libraries
library(psych)
library(whitening)
library("vioplot")
library("rpart")
pander::pander(c(rows=nrow(dataframe),col=ncol(dataframe)-1))
| rows | col |
|---|---|
| 80 | 534 |
pander::pander(table(dataframe[,outcome]))
| 0 | 1 |
|---|---|
| 40 | 40 |
varlist <- colnames(dataframe)
varlist <- varlist[varlist != outcome]
largeSet <- length(varlist) > 1500
Scaling and removing near zero variance columns and highly co-linear(r>0.99999) columns
### Some global cleaning
sdiszero <- apply(dataframe,2,sd) > 1.0e-16
dataframe <- dataframe[,sdiszero]
varlist <- colnames(dataframe)[colnames(dataframe) != outcome]
tokeep <- c(as.character(correlated_Remove(dataframe,varlist,thr=0.99999)),outcome)
dataframe <- dataframe[,tokeep]
varlist <- colnames(dataframe)
varlist <- varlist[varlist != outcome]
iscontinous <- sapply(apply(dataframe,2,unique),length) > 5 ## Only variables with enough samples
dataframeScaled <- FRESAScale(dataframe,method="OrderLogit")$scaledData
numsub <- nrow(dataframe)
if (numsub > 1000) numsub <- 1000
if (!largeSet)
{
hm <- heatMaps(data=dataframeScaled[1:numsub,],
Outcome=outcome,
Scale=TRUE,
hCluster = "row",
xlab="Feature",
ylab="Sample",
srtCol=45,
srtRow=45,
cexCol=cexheat,
cexRow=cexheat
)
par(op)
}
The heat map of the data
if (!largeSet)
{
par(cex=0.6,cex.main=0.85,cex.axis=0.7)
#cormat <- Rfast::cora(as.matrix(dataframe[,varlist]),large=TRUE)
cormat <- cor(dataframe[,varlist],method="pearson")
cormat[is.na(cormat)] <- 0
gplots::heatmap.2(abs(cormat),
trace = "none",
# scale = "row",
mar = c(5,5),
col=rev(heat.colors(5)),
main = "Original Correlation",
cexRow = cexheat,
cexCol = cexheat,
srtCol=45,
srtRow=45,
key.title=NA,
key.xlab="|Pearson Correlation|",
xlab="Feature", ylab="Feature")
diag(cormat) <- 0
print(max(abs(cormat)))
}
[1]
1
DEdataframe <- IDeA(dataframe,verbose=TRUE,thr=thro)
#>
#> Included: 362 , Uni p: 0.0184537 , Uncorrelated Base: 33 , Outcome-Driven Size: 0 , Base Size: 33
#>
#>
1 <R=1.000,r=0.975,N= 255>, Top: 43( 1 )[ 1 : 43 Fa= 42 : 0.975 ]( 42 , 169 , 0 ),<|>Tot Used: 211 , Added: 169 , Zero Std: 0 , Max Cor: 1.000
#>
2 <R=1.000,r=0.975,N= 255>, Top: 32( 1 )[ 1 : 32 Fa= 70 : 0.975 ]( 32 , 95 , 42 ),<|>Tot Used: 242 , Added: 95 , Zero Std: 0 , Max Cor: 1.000
#>
3 <R=1.000,r=0.975,N= 255>, Top: 14( 2 )[ 1 : 14 Fa= 82 : 0.975 ]( 14 , 28 , 70 ),<|>Tot Used: 246 , Added: 28 , Zero Std: 0 , Max Cor: 1.000
#>
4 <R=1.000,r=0.975,N= 255>, Top: 8( 8 )[ 1 : 8 Fa= 89 : 0.975 ]( 8 , 23 , 82 ),<|>Tot Used: 246 , Added: 23 , Zero Std: 0 , Max Cor: 1.000
#>
5 <R=1.000,r=0.950,N= 115>, Top: 41( 4 )[ 1 : 41 Fa= 101 : 0.950 ]( 38 , 62 , 89 ),<|>Tot Used: 274 , Added: 62 , Zero Std: 0 , Max Cor: 1.000
#>
6 <R=1.000,r=0.950,N= 115>, Top: 14( 1 )[ 1 : 14 Fa= 109 : 0.950 ]( 14 , 18 , 101 ),<|>Tot Used: 274 , Added: 18 , Zero Std: 0 , Max Cor: 1.000
#>
7 <R=1.000,r=0.950,N= 115>, Top: 3( 4 )[ 1 : 3 Fa= 110 : 0.950 ]( 3 , 6 , 109 ),<|>Tot Used: 274 , Added: 6 , Zero Std: 0 , Max Cor: 1.000
#>
8 <R=1.000,r=0.900,N= 134>, Top: 48( 4 )[ 1 : 48 Fa= 117 : 0.900 ]( 44 , 69 , 110 ),<|>Tot Used: 295 , Added: 69 , Zero Std: 0 , Max Cor: 1.000
#>
9 <R=1.000,r=0.900,N= 134>, Top: 11( 4 )[ 1 : 11 Fa= 120 : 0.900 ]( 9 , 13 , 117 ),<|>Tot Used: 299 , Added: 13 , Zero Std: 0 , Max Cor: 1.000
#>
10 <R=1.000,r=0.850,N= 109>, Top: 45( 3 )[ 1 : 45 Fa= 138 : 0.860 ]( 44 , 59 , 120 ),<|>Tot Used: 321 , Added: 59 , Zero Std: 0 , Max Cor: 0.995
#>
11 <R=0.995,r=0.847,N= 109>, Top: 8( 2 )[ 1 : 8 Fa= 140 : 0.847 ]( 8 , 9 , 138 ),<|>Tot Used: 321 , Added: 9 , Zero Std: 0 , Max Cor: 0.961
#>
12 <R=0.961,r=0.800,N= 99>, Top: 40( 2 )[ 1 : 40 Fa= 145 : 0.800 ]( 39 , 55 , 140 ),<|>Tot Used: 329 , Added: 55 , Zero Std: 0 , Max Cor: 0.982
#>
13 <R=0.982,r=0.800,N= 99>, Top: 15( 1 )[ 1 : 15 Fa= 148 : 0.800 ]( 15 , 20 , 145 ),<|>Tot Used: 330 , Added: 20 , Zero Std: 0 , Max Cor: 0.999
#>
14 <R=0.999,r=0.800,N= 99>, Top: 6( 1 )[ 1 : 6 Fa= 151 : 0.800 ]( 6 , 6 , 148 ),<|>Tot Used: 330 , Added: 6 , Zero Std: 0 , Max Cor: 0.966
#>
15 <R=0.966,r=0.800,N= 2>, Top: 1( 1 )[ 1 : 1 Fa= 152 : 0.800 ]( 1 , 1 , 151 ),<|>Tot Used: 330 , Added: 1 , Zero Std: 0 , Max Cor: 0.800
#>
16 <R=0.800,r=0.800,N= 0>
#>
[ 16 ], 0.8965018 Decor Dimension: 330 Nused: 330 . Cor to Base: 197 , ABase: 16 , Outcome Base: 0
#>
varlistc <- colnames(DEdataframe)[colnames(DEdataframe) != outcome]
pander::pander(sum(apply(dataframe[,varlist],2,var)))
2.43e+21
pander::pander(sum(apply(DEdataframe[,varlistc],2,var)))
5.08e+14
pander::pander(entropy(discretize(unlist(dataframe[,varlist]), 256)))
0.0195
pander::pander(entropy(discretize(unlist(DEdataframe[,varlistc]), 256)))
0.0691
if (!largeSet)
{
par(cex=0.6,cex.main=0.85,cex.axis=0.7)
UPSTM <- attr(DEdataframe,"UPSTM")
gplots::heatmap.2(1.0*(abs(UPSTM)>0),
trace = "none",
mar = c(5,5),
col=rev(heat.colors(5)),
main = "Decorrelation matrix",
cexRow = cexheat,
cexCol = cexheat,
srtCol=45,
srtRow=45,
key.title=NA,
key.xlab="|Beta|>0",
xlab="Output Feature", ylab="Input Feature")
par(op)
}
if (!largeSet)
{
cormat <- cor(DEdataframe[,varlistc],method="pearson")
cormat[is.na(cormat)] <- 0
gplots::heatmap.2(abs(cormat),
trace = "none",
mar = c(5,5),
col=rev(heat.colors(5)),
main = "Correlation after IDeA",
cexRow = cexheat,
cexCol = cexheat,
srtCol=45,
srtRow=45,
key.title=NA,
key.xlab="|Pearson Correlation|",
xlab="Feature", ylab="Feature")
par(op)
diag(cormat) <- 0
print(max(abs(cormat)))
}
[1]
1
if (nrow(dataframe) < 1000)
{
classes <- unique(dataframe[1:numsub,outcome])
raincolors <- rainbow(length(classes))
names(raincolors) <- classes
datasetframe.umap = umap(scale(dataframe[1:numsub,varlist]),n_components=2)
plot(datasetframe.umap$layout,xlab="U1",ylab="U2",main="UMAP: Original",t='n')
text(datasetframe.umap$layout,labels=dataframe[1:numsub,outcome],col=raincolors[dataframe[1:numsub,outcome]+1])
}
if (nrow(dataframe) < 1000)
{
datasetframe.umap = umap(scale(DEdataframe[1:numsub,varlistc]),n_components=2)
plot(datasetframe.umap$layout,xlab="U1",ylab="U2",main="UMAP: After IDeA",t='n')
text(datasetframe.umap$layout,labels=DEdataframe[1:numsub,outcome],col=raincolors[DEdataframe[1:numsub,outcome]+1])
}
univarRAW <- uniRankVar(varlist,
paste(outcome,"~1"),
outcome,
dataframe,
rankingTest="AUC")
100 : ECG_p_LF_mean 200 : IT_CCV_LF 300 : EDA_Original_mad_D
univarDe <- uniRankVar(varlistc,
paste(outcome,"~1"),
outcome,
DEdataframe,
rankingTest="AUC",
)
100 : La_ECG_p_LF_mean 200 : La_IT_CCV_LF 300 : La_EDA_Original_mad_D
univariate_columns <- c("caseMean","caseStd","controlMean","controlStd","controlKSP","ROCAUC")
##topfive
topvar <- c(1:length(varlist)) <= TopVariables
pander::pander(univarRAW$orderframe[topvar,univariate_columns])
| caseMean | caseStd | controlMean | controlStd | controlKSP | ROCAUC | |
|---|---|---|---|---|---|---|
| ECG_hrv_prctile75 | -2.719 | 8.9741 | -5.8408 | 10.9387 | 0.00378 | 0.731 |
| ECG_hrv_geomean_A_ | 10.370 | 8.0477 | 13.8743 | 7.8682 | 0.00822 | 0.727 |
| IT_LF_baseline_D | 43.569 | 25.1600 | 26.4345 | 15.3449 | 0.61672 | 0.721 |
| IT_p_Total_baseline | 51.974 | 29.4230 | 31.5389 | 17.9800 | 0.71076 | 0.721 |
| IT_VLF_baseline | 57.578 | 32.3488 | 34.9418 | 19.7865 | 0.75500 | 0.720 |
| ECG_hrv_prctile25 | -12.471 | 7.7963 | -16.6349 | 6.7770 | 0.20654 | 0.719 |
| IT_PSD_baseline | 0.059 | 0.0376 | 0.0358 | 0.0228 | 0.21000 | 0.715 |
| ECG_hrv_mean | -7.294 | 5.4920 | -11.0369 | 6.0390 | 0.33833 | 0.714 |
| IT_HF_baseline | 3.308 | 3.5341 | 2.0033 | 2.1158 | 0.00564 | 0.713 |
| ECG_hrv_trimmean25 | -7.619 | 6.1613 | -11.3761 | 6.3193 | 0.35724 | 0.711 |
topLAvar <- univarDe$orderframe$Name[str_detect(univarDe$orderframe$Name,"La_")]
topLAvar <- unique(c(univarDe$orderframe$Name[topvar],topLAvar[1:as.integer(TopVariables/2)]))
finalTable <- univarDe$orderframe[topLAvar,univariate_columns]
theLaVar <- rownames(finalTable)[str_detect(rownames(finalTable),"La_")]
pander::pander(univarDe$orderframe[topLAvar,univariate_columns])
| caseMean | caseStd | controlMean | controlStd | controlKSP | ROCAUC | |
|---|---|---|---|---|---|---|
| La_ECG_RR_window_baseline | 19.766 | 11.454 | 10.652 | 10.519 | 0.62646 | 0.776 |
| La_EDA_Original_mad_D | -7.578 | 10.562 | 1.170 | 9.564 | 0.01215 | 0.776 |
| La_ECG_HR_min_div_std | 0.533 | 0.674 | 1.130 | 0.819 | 0.22297 | 0.762 |
| La_IT_BRV_baseline | -3.930 | 2.265 | -2.114 | 1.544 | 0.80056 | 0.756 |
| La_EDA_Original_baseline_D | -558.607 | 1630.312 | 714.724 | 1166.346 | 0.03382 | 0.746 |
| La_ECG_RR_window_harmmean | 0.595 | 0.380 | 0.375 | 0.411 | 0.04531 | 0.734 |
| La_EDA_Original_max_D | 127.623 | 285.226 | -44.023 | 189.364 | 0.00307 | 0.731 |
| La_ECG_RR_window_mad | 5.227 | 3.767 | 8.361 | 5.205 | 0.64092 | 0.722 |
| La_EDA_processed_trimmean25_D | 0.999 | 2.270 | -0.379 | 1.094 | 0.15207 | 0.721 |
| La_EDA_Original_std_D | -15.390 | 75.369 | 74.185 | 136.981 | 0.01610 | 0.719 |
dc <- getLatentCoefficients(DEdataframe)
fscores <- attr(DEdataframe,"fscore")
theSigDc <- dc[theLaVar]
names(theSigDc) <- NULL
theSigDc <- unique(names(unlist(theSigDc)))
theFormulas <- dc[rownames(finalTable)]
deFromula <- character(length(theFormulas))
names(deFromula) <- rownames(finalTable)
pander::pander(c(mean=mean(sapply(dc,length)),total=length(dc),fraction=length(dc)/(ncol(dataframe)-1)))
| mean | total | fraction |
|---|---|---|
| 3.33 | 301 | 0.825 |
allSigvars <- names(dc)
dx <- names(deFromula)[1]
for (dx in names(deFromula))
{
coef <- theFormulas[[dx]]
cname <- names(theFormulas[[dx]])
names(cname) <- cname
for (cf in names(coef))
{
if (cf != dx)
{
if (coef[cf]>0)
{
deFromula[dx] <- paste(deFromula[dx],
sprintf("+ %5.3f*%s",coef[cf],cname[cf]))
}
else
{
deFromula[dx] <- paste(deFromula[dx],
sprintf("%5.3f*%s",coef[cf],cname[cf]))
}
}
}
}
finalTable <- rbind(finalTable,univarRAW$orderframe[theSigDc[!(theSigDc %in% rownames(finalTable))],univariate_columns])
orgnamez <- rownames(finalTable)
orgnamez <- str_remove_all(orgnamez,"La_")
finalTable$RAWAUC <- univarRAW$orderframe[orgnamez,"ROCAUC"]
finalTable$DecorFormula <- deFromula[rownames(finalTable)]
finalTable$fscores <- fscores[rownames(finalTable)]
Final_Columns <- c("DecorFormula","caseMean","caseStd","controlMean","controlStd","controlKSP","ROCAUC","RAWAUC","fscores")
finalTable <- finalTable[order(-finalTable$ROCAUC),]
pander::pander(finalTable[,Final_Columns])
| DecorFormula | caseMean | caseStd | controlMean | controlStd | controlKSP | ROCAUC | RAWAUC | fscores | |
|---|---|---|---|---|---|---|---|---|---|
| La_ECG_RR_window_baseline | -0.927ECG_RR_window_mean + 1.000ECG_RR_window_baseline | 19.766 | 11.454 | 10.652 | 10.519 | 0.626465 | 0.776 | 0.522 | -1 |
| La_EDA_Original_mad_D | -0.831EDA_Original_std_D + 1.000EDA_Original_mad_D | -7.578 | 10.562 | 1.170 | 9.564 | 0.012150 | 0.776 | 0.507 | 0 |
| La_ECG_HR_min_div_std | + 1.000ECG_HR_min_div_std -2.612ECG_hrv_std + 1.827*ECG_hrv_mad | 0.533 | 0.674 | 1.130 | 0.819 | 0.222968 | 0.762 | 0.579 | -2 |
| La_IT_BRV_baseline | -0.588IT_BRV_mean + 1.000IT_BRV_baseline | -3.930 | 2.265 | -2.114 | 1.544 | 0.800556 | 0.756 | 0.642 | -1 |
| La_EDA_Original_baseline_D | -0.895EDA_Original_mean_D + 1.000EDA_Original_baseline_D | -558.607 | 1630.312 | 714.724 | 1166.346 | 0.033817 | 0.746 | 0.504 | -1 |
| La_ECG_RR_window_harmmean | + 0.715ECG_RR_window_mean -1.710ECG_RR_window_geomean_A_ + 1.000ECG_RR_window_harmmean + 0.702ECG_hrv_mean -0.664*ECG_hrv_trimmean25 | 0.595 | 0.380 | 0.375 | 0.411 | 0.045307 | 0.734 | 0.603 | -3 |
| La_EDA_Original_max_D | -4.171EDA_Original_mean_D + 1.000EDA_Original_max_D + 3.174*EDA_Original_prctile25_D | 127.623 | 285.226 | -44.023 | 189.364 | 0.003074 | 0.731 | 0.575 | -2 |
| La_ECG_RR_window_mad | + 1.000ECG_RR_window_mad -1.741ECG_hrv_mad | 5.227 | 3.767 | 8.361 | 5.205 | 0.640925 | 0.722 | 0.637 | 2 |
| La_EDA_processed_trimmean25_D | + 1.000EDA_processed_trimmean25_D -0.652EDA_processed_median_D | 0.999 | 2.270 | -0.379 | 1.094 | 0.152073 | 0.721 | 0.582 | -1 |
| La_EDA_Original_std_D | + 1.000EDA_Original_std_D -2.861EDA_processed_std_D | -15.390 | 75.369 | 74.185 | 136.981 | 0.016099 | 0.719 | 0.510 | 3 |
| ECG_hrv_mean | NA | -7.294 | 5.492 | -11.037 | 6.039 | 0.338332 | 0.714 | 0.714 | 5 |
| ECG_hrv_trimmean25 | NA | -7.619 | 6.161 | -11.376 | 6.319 | 0.357244 | 0.711 | 0.711 | NA |
| IT_BRV_baseline | NA | -2.921 | 4.580 | -1.086 | 5.893 | 0.082008 | 0.642 | 0.642 | NA |
| ECG_RR_window_mad | NA | 15.323 | 11.931 | 19.343 | 15.318 | 0.016755 | 0.637 | 0.637 | NA |
| ECG_RR_window_mean | NA | 213.082 | 36.977 | 225.302 | 36.955 | 0.959974 | 0.608 | 0.608 | 10 |
| ECG_RR_window_geomean_A_ | NA | 211.244 | 38.262 | 222.696 | 38.877 | 0.944821 | 0.603 | 0.603 | NA |
| ECG_RR_window_harmmean | NA | 209.548 | 39.257 | 220.307 | 40.355 | 0.933015 | 0.603 | 0.603 | NA |
| EDA_processed_trimmean25_D | NA | -2.813 | 7.265 | -3.429 | 6.705 | 0.007616 | 0.582 | 0.582 | NA |
| EDA_processed_std_D | NA | 90.946 | 122.230 | 58.264 | 59.707 | 0.014159 | 0.581 | 0.581 | NA |
| ECG_HR_min_div_std | NA | 8.572 | 9.261 | 9.524 | 10.152 | 0.000285 | 0.579 | 0.579 | NA |
| EDA_Original_max_D | NA | 6539.842 | 7764.396 | 4983.886 | 6585.632 | 0.003534 | 0.575 | 0.575 | NA |
| EDA_Original_prctile25_D | NA | 5745.512 | 7290.266 | 4265.562 | 6006.293 | 0.006278 | 0.573 | 0.573 | NA |
| EDA_Original_mean_D | NA | 5909.653 | 7389.707 | 4451.522 | 6159.561 | 0.005912 | 0.569 | 0.569 | 18 |
| EDA_processed_median_D | NA | -5.842 | 11.119 | -4.675 | 9.408 | 0.007767 | 0.530 | 0.530 | NA |
| ECG_RR_window_baseline | NA | 217.376 | 37.213 | 219.594 | 36.241 | 0.705034 | 0.522 | 0.522 | NA |
| ECG_hrv_std | NA | 7.132 | 8.339 | 7.624 | 9.462 | 0.000275 | 0.514 | 0.514 | NA |
| ECG_hrv_mad | NA | 5.799 | 6.907 | 6.308 | 8.000 | 0.000175 | 0.514 | 0.514 | 40 |
| EDA_Original_std_D | NA | 244.767 | 321.426 | 240.854 | 284.853 | 0.015456 | 0.510 | 0.510 | NA |
| EDA_Original_mad_D | NA | 195.764 | 262.605 | 201.261 | 242.021 | 0.012852 | 0.507 | 0.507 | NA |
| EDA_Original_baseline_D | NA | 4732.181 | 6363.016 | 4700.077 | 6278.106 | 0.003312 | 0.504 | 0.504 | NA |
| IT_BRV_mean | NA | 1.717 | 8.347 | 1.749 | 8.360 | 0.270100 | 0.503 | 0.503 | 21 |
featuresnames <- colnames(dataframe)[colnames(dataframe) != outcome]
pc <- prcomp(dataframe[,iscontinous],center = TRUE,tol=0.002) #principal components
predPCA <- predict(pc,dataframe[,iscontinous])
PCAdataframe <- as.data.frame(cbind(predPCA,dataframe[,!iscontinous]))
colnames(PCAdataframe) <- c(colnames(predPCA),colnames(dataframe)[!iscontinous])
#plot(PCAdataframe[,colnames(PCAdataframe)!=outcome],col=dataframe[,outcome],cex=0.65,cex.lab=0.5,cex.axis=0.75,cex.sub=0.5,cex.main=0.75)
#pander::pander(pc$rotation)
PCACor <- cor(PCAdataframe[,colnames(PCAdataframe) != outcome])
gplots::heatmap.2(abs(PCACor),
trace = "none",
# scale = "row",
mar = c(5,5),
col=rev(heat.colors(5)),
main = "PCA Correlation",
cexRow = 0.5,
cexCol = 0.5,
srtCol=45,
srtRow= -45,
key.title=NA,
key.xlab="Pearson Correlation",
xlab="Feature", ylab="Feature")
EFAdataframe <- dataframeScaled
if (length(iscontinous) < 2000)
{
topred <- min(length(iscontinous),nrow(dataframeScaled),ncol(predPCA)/2)
if (topred < 2) topred <- 2
uls <- fa(dataframeScaled[,iscontinous],nfactors=topred,rotate="varimax",warnings=FALSE) # EFA analysis
predEFA <- predict(uls,dataframeScaled[,iscontinous])
EFAdataframe <- as.data.frame(cbind(predEFA,dataframeScaled[,!iscontinous]))
colnames(EFAdataframe) <- c(colnames(predEFA),colnames(dataframeScaled)[!iscontinous])
EFACor <- cor(EFAdataframe[,colnames(EFAdataframe) != outcome])
gplots::heatmap.2(abs(EFACor),
trace = "none",
# scale = "row",
mar = c(5,5),
col=rev(heat.colors(5)),
main = "EFA Correlation",
cexRow = 0.5,
cexCol = 0.5,
srtCol=45,
srtRow= -45,
key.title=NA,
key.xlab="Pearson Correlation",
xlab="Feature", ylab="Feature")
}
par(op)
par(xpd = TRUE)
dataframe[,outcome] <- factor(dataframe[,outcome])
rawmodel <- rpart(paste(outcome,"~."),dataframe,control=rpart.control(maxdepth=3))
pr <- predict(rawmodel,dataframe,type = "class")
ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
if (length(unique(pr))>1)
{
plot(rawmodel,main="Raw",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
text(rawmodel, use.n = TRUE,cex=0.75)
ptab <- epiR::epi.tests(table(pr==0,dataframe[,outcome]==0))
}
pander::pander(table(dataframe[,outcome],pr))
| 0 | 1 | |
|---|---|---|
| 0 | 38 | 2 |
| 1 | 7 | 33 |
pander::pander(ptab)
detail:
| statistic | est | lower | upper |
|---|---|---|---|
| ap | 0.4375 | 0.32676 | 0.553 |
| tp | 0.5000 | 0.38605 | 0.614 |
| se | 0.8250 | 0.67221 | 0.927 |
| sp | 0.9500 | 0.83080 | 0.994 |
| diag.ac | 0.8875 | 0.79715 | 0.947 |
| diag.or | 89.5714 | 17.38881 | 461.391 |
| nndx | 1.2903 | 1.08636 | 1.988 |
| youden | 0.7750 | 0.50301 | 0.921 |
| pv.pos | 0.9429 | 0.80843 | 0.993 |
| pv.neg | 0.8444 | 0.70545 | 0.935 |
| lr.pos | 16.5000 | 4.24197 | 64.180 |
| lr.neg | 0.1842 | 0.09364 | 0.362 |
| p.rout | 0.5625 | 0.44700 | 0.673 |
| p.rin | 0.4375 | 0.32676 | 0.553 |
| p.tpdn | 0.0500 | 0.00611 | 0.169 |
| p.tndp | 0.1750 | 0.07338 | 0.328 |
| p.dntp | 0.0571 | 0.00700 | 0.192 |
| p.dptn | 0.1556 | 0.06491 | 0.295 |
tab:
| Outcome + | Outcome - | Total | |
|---|---|---|---|
| Test + | 33 | 2 | 35 |
| Test - | 7 | 38 | 45 |
| Total | 40 | 40 | 80 |
method: exact
digits: 2
conf.level: 0.95
pander::pander(ptab$detail[c(5,3,4,6),])
| statistic | est | lower | upper | |
|---|---|---|---|---|
| 5 | diag.ac | 0.887 | 0.797 | 0.947 |
| 3 | se | 0.825 | 0.672 | 0.927 |
| 4 | sp | 0.950 | 0.831 | 0.994 |
| 6 | diag.or | 89.571 | 17.389 | 461.391 |
par(op)
par(xpd = TRUE)
DEdataframe[,outcome] <- factor(DEdataframe[,outcome])
IDeAmodel <- rpart(paste(outcome,"~."),DEdataframe,control=rpart.control(maxdepth=3))
pr <- predict(IDeAmodel,DEdataframe,type = "class")
ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
if (length(unique(pr))>1)
{
plot(IDeAmodel,main="IDeA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
text(IDeAmodel, use.n = TRUE,cex=0.75)
ptab <- epiR::epi.tests(table(pr==0,DEdataframe[,outcome]==0))
}
pander::pander(table(DEdataframe[,outcome],pr))
| 0 | 1 | |
|---|---|---|
| 0 | 34 | 6 |
| 1 | 3 | 37 |
pander::pander(ptab)
detail:
| statistic | est | lower | upper |
|---|---|---|---|
| ap | 0.5375 | 0.4224 | 0.650 |
| tp | 0.5000 | 0.3860 | 0.614 |
| se | 0.9250 | 0.7961 | 0.984 |
| sp | 0.8500 | 0.7016 | 0.943 |
| diag.ac | 0.8875 | 0.7972 | 0.947 |
| diag.or | 69.8889 | 16.1978 | 301.551 |
| nndx | 1.2903 | 1.0786 | 2.009 |
| youden | 0.7750 | 0.4978 | 0.927 |
| pv.pos | 0.8605 | 0.7207 | 0.947 |
| pv.neg | 0.9189 | 0.7809 | 0.983 |
| lr.pos | 6.1667 | 2.9335 | 12.963 |
| lr.neg | 0.0882 | 0.0295 | 0.264 |
| p.rout | 0.4625 | 0.3503 | 0.578 |
| p.rin | 0.5375 | 0.4224 | 0.650 |
| p.tpdn | 0.1500 | 0.0571 | 0.298 |
| p.tndp | 0.0750 | 0.0157 | 0.204 |
| p.dntp | 0.1395 | 0.0530 | 0.279 |
| p.dptn | 0.0811 | 0.0170 | 0.219 |
tab:
| Outcome + | Outcome - | Total | |
|---|---|---|---|
| Test + | 37 | 6 | 43 |
| Test - | 3 | 34 | 37 |
| Total | 40 | 40 | 80 |
method: exact
digits: 2
conf.level: 0.95
pander::pander(ptab$detail[c(5,3,4,6),])
| statistic | est | lower | upper | |
|---|---|---|---|---|
| 5 | diag.ac | 0.887 | 0.797 | 0.947 |
| 3 | se | 0.925 | 0.796 | 0.984 |
| 4 | sp | 0.850 | 0.702 | 0.943 |
| 6 | diag.or | 69.889 | 16.198 | 301.551 |
par(op)
par(xpd = TRUE)
PCAdataframe[,outcome] <- factor(PCAdataframe[,outcome])
PCAmodel <- rpart(paste(outcome,"~."),PCAdataframe,control=rpart.control(maxdepth=3))
pr <- predict(PCAmodel,PCAdataframe,type = "class")
ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
if (length(unique(pr))>1)
{
plot(PCAmodel,main="PCA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
text(PCAmodel, use.n = TRUE,cex=0.75)
ptab <- epiR::epi.tests(table(pr==0,PCAdataframe[,outcome]==0))
}
pander::pander(table(PCAdataframe[,outcome],pr))
| 0 | 1 | |
|---|---|---|
| 0 | 22 | 18 |
| 1 | 7 | 33 |
pander::pander(ptab)
detail:
| statistic | est | lower | upper |
|---|---|---|---|
| ap | 0.637 | 0.5224 | 0.742 |
| tp | 0.500 | 0.3860 | 0.614 |
| se | 0.825 | 0.6722 | 0.927 |
| sp | 0.550 | 0.3849 | 0.707 |
| diag.ac | 0.688 | 0.5741 | 0.787 |
| diag.or | 5.762 | 2.0647 | 16.079 |
| nndx | 2.667 | 1.5772 | 17.508 |
| youden | 0.375 | 0.0571 | 0.634 |
| pv.pos | 0.647 | 0.5007 | 0.776 |
| pv.neg | 0.759 | 0.5646 | 0.897 |
| lr.pos | 1.833 | 1.2649 | 2.657 |
| lr.neg | 0.318 | 0.1535 | 0.660 |
| p.rout | 0.362 | 0.2579 | 0.478 |
| p.rin | 0.637 | 0.5224 | 0.742 |
| p.tpdn | 0.450 | 0.2926 | 0.615 |
| p.tndp | 0.175 | 0.0734 | 0.328 |
| p.dntp | 0.353 | 0.2243 | 0.499 |
| p.dptn | 0.241 | 0.1030 | 0.435 |
tab:
| Outcome + | Outcome - | Total | |
|---|---|---|---|
| Test + | 33 | 18 | 51 |
| Test - | 7 | 22 | 29 |
| Total | 40 | 40 | 80 |
method: exact
digits: 2
conf.level: 0.95
pander::pander(ptab$detail[c(5,3,4,6),])
| statistic | est | lower | upper | |
|---|---|---|---|---|
| 5 | diag.ac | 0.688 | 0.574 | 0.787 |
| 3 | se | 0.825 | 0.672 | 0.927 |
| 4 | sp | 0.550 | 0.385 | 0.707 |
| 6 | diag.or | 5.762 | 2.065 | 16.079 |
par(op)
EFAdataframe[,outcome] <- factor(EFAdataframe[,outcome])
EFAmodel <- rpart(paste(outcome,"~."),EFAdataframe,control=rpart.control(maxdepth=3))
pr <- predict(EFAmodel,EFAdataframe,type = "class")
ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
if (length(unique(pr))>1)
{
plot(EFAmodel,main="EFA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
text(EFAmodel, use.n = TRUE,cex=0.75)
ptab <- epiR::epi.tests(table(pr==0,EFAdataframe[,outcome]==0))
}
pander::pander(table(EFAdataframe[,outcome],pr))
| 0 | 1 | |
|---|---|---|
| 0 | 18 | 22 |
| 1 | 6 | 34 |
pander::pander(ptab)
detail:
| statistic | est | lower | upper |
|---|---|---|---|
| ap | 0.700 | 5.87e-01 | 0.797 |
| tp | 0.500 | 3.86e-01 | 0.614 |
| se | 0.850 | 7.02e-01 | 0.943 |
| sp | 0.450 | 2.93e-01 | 0.615 |
| diag.ac | 0.650 | 5.35e-01 | 0.753 |
| diag.or | 4.636 | 1.59e+00 | 13.494 |
| nndx | 3.333 | -1.73e+02 | 1.792 |
| youden | 0.300 | -5.76e-03 | 0.558 |
| pv.pos | 0.607 | 4.68e-01 | 0.735 |
| pv.neg | 0.750 | 5.33e-01 | 0.902 |
| lr.pos | 1.545 | 1.13e+00 | 2.105 |
| lr.neg | 0.333 | 1.48e-01 | 0.752 |
| p.rout | 0.300 | 2.03e-01 | 0.413 |
| p.rin | 0.700 | 5.87e-01 | 0.797 |
| p.tpdn | 0.550 | 3.85e-01 | 0.707 |
| p.tndp | 0.150 | 5.71e-02 | 0.298 |
| p.dntp | 0.393 | 2.65e-01 | 0.532 |
| p.dptn | 0.250 | 9.77e-02 | 0.467 |
tab:
| Outcome + | Outcome - | Total | |
|---|---|---|---|
| Test + | 34 | 22 | 56 |
| Test - | 6 | 18 | 24 |
| Total | 40 | 40 | 80 |
method: exact
digits: 2
conf.level: 0.95
pander::pander(ptab$detail[c(5,3,4,6),])
| statistic | est | lower | upper | |
|---|---|---|---|---|
| 5 | diag.ac | 0.65 | 0.535 | 0.753 |
| 3 | se | 0.85 | 0.702 | 0.943 |
| 4 | sp | 0.45 | 0.293 | 0.615 |
| 6 | diag.or | 4.64 | 1.593 | 13.494 |
par(op)